import scrapy
from guoxueNamer.items import GuoxuenamerItem
import re
class GuoxuenameSpider(scrapy.Spider):
    name = 'guoxueName'
    allowed_domains = ['guoxue.shufaji.com']
    start_urls = ['http://guoxue.shufaji.com/']

    def parse(self, response):
        pat = "\u3000"
        item = GuoxuenamerItem()
        i=0
        start = response.xpath('//html/body/center[1]/table/tr[2]/td')
        type_lists = start.xpath('.//div[@class="tit2"]')
        #找书的类型
        #type_text = type_lists[1].xpath(".//text()").extract_first()
        #书名的table
        name_lists = start.xpath('.//table[@class="tbC"]')
        for i in range(len(name_lists)):#循环书名table
            name_list = name_lists[i].xpath('.//tr/td')#书名table下的td
            for names in name_list:
                if names.xpath('.//a'):
                    bk_name = names.xpath('.//text()').extract_first()
                    bk_url = names.xpath(".//@href").extract_first()
                    bk_url= self.start_urls[0] + bk_url
                    item["art_name"] = bk_name
                    # item['art_type'] = type_lists[i].xpath(".//text()").extract_first()
                    # item["art_type"] = re.sub(pat,"",item["art_type"])
                    item["art_url"] = bk_url
                    yield item